
package minire;

import java.io.*;
import java.text.ParseException;
import java.util.*;

import dfabuilder.*;
import dfabuilder.DFA.TokenMatch;

/**
 * Used for scanning a stream using DFAs.  Implements some methods for common uses.
 *
 * @author Nicolas Papin
 */
public class DFAScanner
{
	//
	// CLASS/INSTANCE DATA
	//
	/**
	 * The DFA to scan using.
	 */
	private DFA mScan;
	
	/**
	 * Token buffer (in we need to maintain data from passed in input stream).
	 */
	private String mBuffer;
	
	/**
	 * Whether the tables should be minimized.
	 */
	private boolean mMinimize;
	
	//
	// CTOR
	//
	public DFAScanner(boolean minimize)
	{
		mMinimize = minimize;
	}
	
	public DFAScanner()
	{
		this(true);
	}
	
	//
	// PUBLIC METHODS
	//
	/**
	 * Returns the input buffer.
	 * 
	 * @return the input buffer.
	 */
	public String getBuffer()
	{
		return mBuffer;
	}
	
	/**
	 * Adds a regular expression to the scanner.
	 * 
	 * @param regex the regular expression.
	 */
	public void addRegex(String regex)
	{
		addRegex(regex, null);
	}
	
	/**
	 * Adds a regular expression with a label to the scanner.
	 * 
	 * @param regex the regular expression.
	 * @param label the label.
	 */
	public void addRegex(String regex, String label)
	{
		NFA nfa = Parser.fromString(regex);
		nfa.setGoalLabels(label);
		
		if(mScan == null)
		{
			mScan = DFA.fromNFA(nfa);
		}
		else
		{
			mScan = DFA.fromNFA(nfa.union(mScan));
		}
		
		if(mMinimize)
			mScan = mScan.minimize();
	}
	
	/**
	 * Returns the label of the token starting at the beginning of the stream.
	 * Returns null if there is not a valid match to the scanner.
	 * 
	 * @param stream the stream to search.
	 * @return the label.
	 */
	public String labelToken(InputStream stream) throws IOException, ParseException
	{
		mScan.reset();
		mBuffer = "";
		int nextInt;
		char next;	
		
		String longestLabel = null;
		
		while((nextInt = stream.read()) != -1)
		{
			next = (char)nextInt;
			mBuffer += next;
			
			TokenMatch tm = mScan.test(Character.toString(next));
			if(tm.isAccepted())
			{
				longestLabel = tm.getLabel();
			}
			else if(tm.isRejected())
			{
				break;
			}
		}
		
		if(longestLabel == null)
			throw new ParseException(mBuffer, mBuffer.length());
				
		return longestLabel;
	}
	
	/**
	 * Finds all occurrences of the specified regex in the given file.  Uses longest matching and does not include overlapping occurrences. 
	 * 
	 * @param f the file to search.
	 * @throws IOException 
	 */
	public List<MatchDescriptor> findAllInFile(File f) throws IOException// throws IOException
	{
		RandomAccessFile raf = null;
		ArrayList<MatchDescriptor> ret = new ArrayList<MatchDescriptor>();
		MatchDescriptor currMatch = null;
		String currentString;
		int currPos = 0;
		int nextInt;
		char next;
		
		try
		{
			raf = new RandomAccessFile(f, "r");
			
			while(currPos < raf.length())
			{
				raf.seek(currPos);
				mScan.reset();
				currentString = "";
				currMatch = null;
				
				while((nextInt = raf.read()) != -1)
				{
					next = (char)nextInt;
					
					TokenMatch tm = mScan.test(Character.toString(next));
					
					if(tm.isRejected())
					{
						break;
					}
					else
					{
						currentString += next;
						
						if(tm.isAccepted())
						{
							currMatch = new MatchDescriptor(currentString, (int)raf.getFilePointer() - currentString.length());
						}
					}
				}
				
				if(currMatch != null)
				{
					ret.add(currMatch);
					currPos += currMatch.getString().length();
				}
				else
				{
					currPos++;
				}
			}
		}
		finally
		{
			if(raf != null)
				raf.close();
		}
		
		return ret;
	}
	
	/**
	 * Finds all occurrences of the specified regex in the given string.  Uses longest matching and does not include overlapping occurrences. 
	 * 
	 * @param f the file to search.
	 * @throws IOException 
	 */
	public List<MatchDescriptor> findAllInString(String f)
	{
		ArrayList<MatchDescriptor> ret = new ArrayList<MatchDescriptor>();
		MatchDescriptor currMatch = null;
		String currentString;
		int currPos = 0, currInnerPos = 0;
		char next;
		
		while(currPos < f.length())
		{
			currInnerPos = currPos;
			mScan.reset();
			currentString = "";
			currMatch = null;
			
			while(currInnerPos < f.length())
			{
				next = f.charAt(currInnerPos);
				currInnerPos++;
				
				TokenMatch tm = mScan.test(Character.toString(next));
				
				if(tm.isRejected())
				{
					break;
				}
				else
				{
					currentString += next;
					
					if(tm.isAccepted())
					{
						currMatch = new MatchDescriptor(currentString, currInnerPos - currentString.length());
					}
				}
			}
			
			if(currMatch != null)
			{
				ret.add(currMatch);
				currPos += currMatch.getString().length();
			}
			else
			{
				currPos++;
			}
		}
		
		return ret;
	}
	
	
	//
	// INNER CLASS
	//
	public class MatchDescriptor
	{
		//
		// CLASS/INSTANCE DATA
		//
		/**
		 * The string that was matched.
		 */
		private String mString;
		
		/**
		 * The location that the string was matched at.
		 */
		private int mLocation;
		
		
		//
		// CTOR
		//
		public MatchDescriptor(String str, int loc)
		{
			mString = str;
			mLocation = loc;
		}
		
		
		//
		// PUBLIC METHODS
		//
		/**
		 * Returns the string.
		 * 
		 * @return the string.
		 */
		public String getString()
		{
			return mString;
		}
		
		/**
		 * Returns the location.
		 * 
		 * @return the location.
		 */
		public int getLocation()
		{
			return mLocation;
		}
		
		/**
		 * Returns a string representation of the match.
		 * 
		 * @return the string representation.
		 */
		public String toString()
		{
			return "\"" + mString + "\" at " + mLocation;
		}
	}
}
